#!/bin/bash

set -o errexit
export LC_ALL=C

#export TMP_DATA_PATH=/tmp/mixagol/annotation_cluster

#export BASE_DATA_PATH=/home/mixagol/work/data
export BASE_DATA_PATH=/home/mixagol/data
# export BASE_DATE=20111030
export BASE_DATE=20120802

#export GENOME_ID=$1
#export AN_DIR=~/work/data/annotation_cluster/${GENOME_ID}

export AN_DIR=$1

if [ ! -d ${AN_DIR} ]; then
    echo "Directory for annotation genome ${GENOME_ID} does not exists!" > /dev/stderr
    exit 1
fi

#hostname > ${AN_DIR}/worker_hostname

##
## DATABASES
##

if [ ! -f ${AN_DIR}/genes_pathways.txt ]; then

join -t$'\t' -j 2 \
    <(join -t$'\t' -1 2 -2 1 \
        <(cat "${AN_DIR}/genes_info.txt" | cut -f1,2 | sort -t$'\t' -k2,2 -T . -S1G) \
        "${BASE_DATA_PATH}/7_aux/kegg/${BASE_DATE}/ncbi2kegg.txt" \
        | cut -f2,3 \
        | sort -t$'\t' -k2,2 -T . -S1G \
    ) \
    <(join -t$'\t' \
        ${BASE_DATA_PATH}/5_pathways/${BASE_DATE}/pathway_kegg_map_full.txt \
        ${BASE_DATA_PATH}/5_pathways/${BASE_DATE}/pathways_full.txt \
        | cut -f1,2,3,5 \
        | sort -t$'\t' -k2,2 -T . -S1G \
    ) \
    | cut -f2- \
    | sort -t$'\t' -k1,1 -T . -S1G \
    > ${AN_DIR}/genes_pathways.txt

fi

##
## Annotate genes
##

# full
find $AN_DIR/ -type d -name "ann*" | sort | xargs -n1 -P1 6_annotation/annotate_pathway.py

# rm -rf ${CUR_DATA_DIR}

#9_annotation_analysis/create_results_exact_pathway.sh $AN_DIR

